package cn.datawin.task.pipe;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import cn.datawin.spider.httputil.AsyncHttpClientService;
import cn.datawin.spider.httputil.HttpRequest;
import cn.datawin.spider.httputil.HttpResponse;
import cn.datawin.spider.httputil.HttpRequest.Method;
import cn.datawin.spider.page.Page;
import cn.datawin.spider.pipeline.PipeLine;
import cn.datawin.spider.processor.Processor;
import cn.datawin.spider.util.UrlUtil;
import cn.datawin.task.Config;
import cn.datawin.task.Task;
import cn.datawin.task.util.HttpUtil;
import cn.datawin.task.util.TaskUtil;

public class PagePipeLine implements PipeLine{

	AsyncHttpClientService client = new AsyncHttpClientService();
	
	/**
	 * url
	 * page 规则 替换 
	 * ?page=
	 * pages
	 * 正则替换 或其他替换方式
	 * 
	 */
	public void pipe(Processor process) {
		Task task = null;
		try {
			Page page = process.getPage();
			System.out.println("results======="+process.getParams());
			System.out.println("nextUrl======="+process.getList());
			task=(Task)process.getTask();
			//判断url还是data  标识   解析方式判断是data  page  type
			List<Object> objects=process.getList();
			if(null==objects){
				return;
			}
			int postTaskNum = Integer.parseInt(Config.apps.get("postTaskNum"));
			List<Task> tasks = new ArrayList<Task>();
			for (int i = 0; i < objects.size(); i++) {
				String url = objects.get(i).toString();
				Task tmp = (Task) task.clone();
				Task tt =  urlToTask(url,tmp,page);
				if(tt == null) continue;
				tasks.add(tt);
				if(tasks.size() == postTaskNum){ // 
					addTasks(tasks);
				}
				if(i == objects.size()-1){ //// 插最后剩余
					addTasks(tasks);
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}finally{
			client.shutdown();
			client = null;
		}
	}

	private Task urlToTask(String url, Task task,Page page)throws Exception {
		if(url.startsWith("http")){
			task.setUrl(url);
		}else if(url.contains("javascript")){
			return null;
		}else{
			task.setUrl(UrlUtil.canonicalizeUrl(url, page.getHttpRequest().getUrl()));
		}
		
		task.setProcessor(task.nextProcessor());
		task.setClient(task.getClient());
		return task;
	}
	
	
	public  String postStr(String url,String charset, Map<String, String> params) throws Exception{
		HttpRequest request = new HttpRequest(url, Method.post);
		request.setPostParams(params);
		HttpResponse res = client.execute(request);
		return res.getResponseString(charset);
	}
	
	public String addTasks(List<Task> tasks) throws Exception{
		HashMap<String, String> obj = new HashMap<String, String>();
		obj.put("serializeList", HttpUtil.serialize(tasks));
		List<List<Map<String, Object>>> rules = new ArrayList<List<Map<String,Object>>>();
		for(Task task: tasks){
			rules.add(task.getRules()) ;
		}
		obj.put("rules", HttpUtil.serialize(rules));
		tasks.clear();  // 清空数据
		return addtask(obj);
	}
	
	public  String addtask(Map<String, String> map) throws Exception{
		return postStr(Config.apps.get("url")+"/task/inserttask","UTF-8",map);
	}
	
}
